import pandas as pd;
import numpy as np;
import plotly.express as plx
from plotly.subplots import make_subplots
import plotly.graph_objects as go
artists=pd.read_csv("data/artists.csv")
tracks = pd.read_csv("data/tracks.csv")
tracks["release_year"] = tracks["release_date"].apply(lambda e: e.split("-")[0])
def playSong(id):
print(f"https://open.spotify.com/embed/track/{id}?utm_source=generator")
Most popular song
tracks.loc[tracks['popularity'] == 100,['name','artists']]
| name | artists | |
|---|---|---|
| 93802 | Peaches (feat. Daniel Caesar & Giveon) | ['Justin Bieber', 'Daniel Caesar', 'Giveon'] |
df=tracks.groupby("release_year",as_index=False)["popularity"].mean()
df = df.loc[(df['release_year'] != "1900") & (df['release_year']!= "2021")] #remove incorrect data
fig = plx.histogram(df, x='release_year', y='popularity', histfunc="avg")
fig.layout["yaxis"]["title"]="Avg Popularity"
fig.layout["xaxis"]["title"]="Release Year"
fig.show()
df = tracks.sort_values(by=['tempo']).loc[tracks['danceability'] != 0]
df['tempo'] = df['tempo'].round()
df = df.loc[(df['tempo'] > 60) & (df['tempo'] < 220)]
df["tempoType"]="(40-50)"
for i in range (50, 220, 10):
df.loc[(df["tempo"]>=i),"tempoType"]=f"({i}-{i+10})"
df.sort_values(by="tempoType", ascending=True)
fig = plx.histogram(df, x='tempoType', y='danceability', histfunc="avg")
fig.layout["yaxis"]["title"]="Avg Danceability"
fig.layout["xaxis"]["title"]="Tempo Range"
fig.show()